import cv2
import gym
import highway_env
#from Python.highway-env-master import setup.py
import sys

import highway_env
from stable_baselines3 import DDPG
import torch
import pprint
import winsound #to make beep when learning is done



frameSize = (640,280)
#out = cv2.VideoWriter('video-DQN-baseline3.avi',cv2.VideoWriter_fourcc(*'DIVX'), 4, frameSize)
out = cv2.VideoWriter('video-DDPG1.avi',cv2.VideoWriter_fourcc(*'DIVX'), 4, frameSize)



#Change which environment to use here:
#Remember to change the name of model.save and model.load
#tenserboard log

#env = gym.make("racetrack")
env = gym.make("intersection-v1")


env.configure({
    "screen_width": 640,
    "screen_height": 280,
    #"normalize_reward": False
})

env.reset()

pprint.pprint(env.config)

model = DDPG('MlpPolicy', env,
              policy_kwargs=dict(net_arch=[256, 256]),
              learning_rate=0.001,#0.001 #5e-4
              buffer_size=15000,
              learning_starts=100,
              batch_size=32,
             tau = 0.005,#betweeon 0,1
              gamma=0.99,
              train_freq=(1,'episode'),
              gradient_steps=1, # How many gradient steps to do after each rollout (see train_freq)
             #Set to -1 means to do as many gradient steps as steps done in the environment during the rollout.


             #action_noise = #Optional
             #replay_buffer_class #Optional
             #replay_buffer_kwargs #Optional

             optimize_memory_usage = False, #(bool)  Enable a memory efficient variant of
             #the replay buffer at a cost of more complexity

              verbose=1, #Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for debug messages
             #seed Optional
             device = 'auto', #Device (cpu, cuda, …)
             #on which the code should be run. Setting it to auto, the code will be run on the GPU if possible.
              tensorboard_log="intersection_ddpg/")


# uncomment the lines below if you want to train a new model

model.learn(total_timesteps=int(1e3),progress_bar=True)

winsound.Beep(440, 500)


#Change name of model.save:

model.save("intersection_ddpg/model")

# print()
# print("Done Learning!!")
# print()





########## Load and test saved model##############


#Change name of model.load:

model = DDPG.load("intersection_ddpg/model")
#while True:
for f in range(40):
  done = truncated = False
  obs, info = env.reset()
  while not (done or truncated):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = env.step(action)#env.step(action.item(0))

    #print(reward)
    #print(info)
    #input("Press Enter to continue...")

    env.render()
    cur_frame = env.render(mode="rgb_array")
    out.write(cur_frame)


#cur_frame = env.render(mode="rgb_array")
#out.write(cur_frame)

out.release()

print('DONE')


#print(env_reward())

#NOTE
#rewards is the gives rewards along different categories,
#reward combines the values from rewards into 1 value
#reward does this calculation using config and rewards

